Assume we have a continious 2-piecewise defined function $f(x) = a_l x + b (x < 0)$/$f(x) = a_r x + b (x > 0)$. Can we detect the "jump in slope" at x=0 from a noisy dataset? Here, noisy means that we add iid $(0, \sigma)$-distributed Gaussian noise to each $f(x_i)$.


In [78]:
from tools.plot import plot
a_l, a_r = 1., -1.
b = 4
sigma = 2.
n_samples = 100
twolin = lambda x, xc, a, b, c: (x < xc) * a * (x - xc) \
        + (x >= xc) * b * (x - xc) + c
f = lambda x: twolin(x, 0, a_l, a_r, b)
xs = np.random.uniform(low=-3, high=3, size=n_samples)
ys = f(xs) + np.random.normal(scale=sigma, size=n_samples)

plot(f, (-3, 3))
pl.plot(xs, ys, lw=0, marker='x')


Out[78]:
[<matplotlib.lines.Line2D at 0x7fbbf8fca950>]

In [79]:
def error(f):
    return np.sum((f(xs) - ys)**2) / len(xs)

In [80]:
def get_split_fit(xc):
    sel = xs < xc
    if sum(sel) < 2 or sum(sel) > len(sel) - 2: return False
    
    n = sum(sel) / len(sel)
    m = np.mean
    num = lambda x, y: m(y) - m(x - xc) * m((x - xc) * y) / m((x - xc)**2)
    den = lambda x: m((x - xc))**2 / m((x - xc)**2)
    slo = lambda x, y: (m((x - xc) * y) - c * m(x - xc)) / m((x - xc)**2)
    
    c = (n * num(xs[sel], ys[sel]) + (1-n) * num(xs[-sel], ys[-sel])) / \
         (1 - n * den(xs[sel]) - (1-n) * den(xs[-sel]))
    a = slo(xs[sel], ys[sel])
    b = slo(xs[-sel], ys[-sel])
    
    return a, b, c

In [81]:
for xc in np.linspace(-2, 2, 50):
    vals = get_split_fit(xc)
    pl.scatter([xc], error(lambda x: twolin(x, xc, *vals)))



In [83]:
xc = 0
vals = get_split_fit(xc)
plot(f, (-3, 3))
pl.plot(xs, ys, lw=0, marker='x')
plot(lambda x: twolin(x, xc, *vals), (-3, 3))
print(vals)


(0.93151053098532355, -0.66037084723455841, 3.921483985453853)

Now, can we do this Baysian?


In [2]:
import pymc as pm

In [ ]: